library(tidyverse)
library(ggplot2)
library(tidyverse)
library(kableExtra)
library(reshape2)
library(dplyr)
library(network)
library(ggraph)
library(GGally)
library(sna)
library(ggplot2)
library(ggnetwork)
library(ggraph)
library(tidygraph)
library(ggsci)
library(data.table)
library(reshape2)
library(tidymodels)
library(furrr)
library(R.matlab)
library(BiocParallel)
library(ComplexHeatmap)
library(circlize)
library(RColorBrewer)
library(gtools)
library(graphlayouts)

net_dir = "/pastel/projects/speakeasy_dlpfc/SpeakEasy_singlenuclei/2nd_pass/snakemake-sn/results/ast/"
expression_dir = "/pastel/projects/speakeasy_dlpfc/SpeakEasy_singlenuclei/2nd_pass/snakemake-sn/input/"
resources_dir = params$resources_dir

# BN folders 
data_prefix = "ast_m19"
bn_results_dir = "/pastel/projects/speakeasy_dlpfc/BN/sn_dlpfc_ast_res/ast_modules/ast_m19/"
BN_run_dir = "/pastel/resources/bayesian_networks/CINDERellA/"
BN_output_dir = paste0(bn_results_dir,data_prefix,"_res")
BN_run_data = paste0(bn_results_dir,data_prefix,"_exp.txt")
cat("Preparing phenotypes...")
## Preparing phenotypes...
### phenotype
phenotype = readRDS(paste0(resources_dir, "phenotypes/basic_Apr2022.rds"))
phenotype_dt = phenotype$data
metadata = phenotype$data

### Create custom phenotypes and includes resilience
phenotype_dt$ad_dementia_status = ifelse(phenotype_dt$cogdx_3gp<3,0,1)
phenotype_dt$cognitive_impairment_status = ifelse(phenotype_dt$cogdx_3gp<2,0,1)
phenotype_dt$gpath_sqrt = sqrt(phenotype_dt$gpath)
phenotype_dt$amyloid_sqrt = sqrt(phenotype_dt$amyloid)
phenotype_dt$tangles_sqrt = sqrt(phenotype_dt$tangles)
phenotype_dt$nft_sqrt = sqrt(phenotype_dt$nft)
phenotype_dt$plaq_d_sqrt = sqrt(phenotype_dt$plaq_d)
phenotype_dt$plaq_n_sqrt = sqrt(phenotype_dt$plaq_n)
phenotype_dt$tdp_43_binary = phenotype_dt$tdp_st2
phenotype_dt$dxpark_status = phenotype_dt$dxpark-1
resilience = read.csv(paste0(resources_dir, "resilience/resilience_capuano_july2022/R719_CR_ROSMAP.csv"))
resilience$projid2 = sprintf("%08d", resilience$projid) # Add leading zeros 
phenotype_dt = phenotype_dt %>% dplyr::left_join(resilience[,-1], by = c("projid"="projid2"))

# save(phenotype_dt, file = paste0(bn_results_dir,"phenotypes.RData"))

cat("Saved:", paste0(bn_results_dir,"phenotypes.RData"))
## Saved: /pastel/projects/speakeasy_dlpfc/BN/sn_dlpfc_ast_res/ast_modules/ast_m19/phenotypes.RData

Expression data

cat("Loading SE modules...")
## Loading SE modules...
mod2plot = 19 # Module we want to plot
# pval_cutoff = 0.05  # p-value cutoff for the BN
top_n_genes = 100  # Number of top genes to include in the BN

# Expression data for a single set:
exprData = read.table(paste0(expression_dir, "ast.txt"), header = T, stringsAsFactors = F, check.names = F)
expr_matx = as.data.frame(exprData) # Residuals of the expression
gene_modules = read.table(paste0(net_dir, "geneBycluster.txt"), header = T, stringsAsFactors = F) # clusters from SpeakEasy

# Select the expression values for the module of interest 
to_plot = gene_modules$ensembl[gene_modules$cluster_lv3 == mod2plot]
expr_matx_mod = expr_matx[to_plot, ]

# save(expr_matx_mod, file = paste0(bn_results_dir,"dataExp.RData"))
cat("Selecting modules based on association results...")
## Selecting modules based on association results...
load(paste0(bn_results_dir,"phenotypes.RData"))
load(paste0(bn_results_dir,"dataExp.RData"))

phenotype_match = phenotype_dt[match(colnames(expr_matx_mod),phenotype_dt$projid),]
expr_matx_mod_t <- as.matrix(t(expr_matx_mod))

#identical(rownames(data4linear_reg),phenotype_match$projid) # TRUE

#Covariates
covariates_ = c( "cogng_demog_slope",
                "tangles_sqrt", 
                "amyloid_sqrt")
adj_by = c("msex","age_death")

data4linear_reg = cbind(expr_matx_mod_t,phenotype_match[,covariates_,drop=F],phenotype_match[,adj_by,drop=F])
scale_data = TRUE
matrix_pvalue = matrix(NA, nrow = length(covariates_), ncol = ncol(expr_matx_mod_t))
for (x in 1:length(covariates_)){
  for (y in 1:ncol(expr_matx_mod_t)){
    x_cov = covariates_[x]
    y_mod = colnames(data4linear_reg)[y]
    if(scale_data){
      form = as.formula(paste(paste0("scale(",y_mod,")"),"~",paste0("scale(",x_cov,")"),"+",paste(adj_by,collapse = "+")))  
    }else{
      form = as.formula(paste(y_mod,"~",x_cov,"+",paste(adj_by,collapse = "+")))
    }
    matrix_pvalue[x,y] <- glance(summary( lm(form, data = data4linear_reg) ))$p.value 
  }
}
rownames(matrix_pvalue) = covariates_
colnames(matrix_pvalue) = colnames(expr_matx_mod_t)
matrix_pvalue_adj = matrix(p.adjust(as.vector(as.matrix(matrix_pvalue)), method='bonferroni'),ncol=ncol(matrix_pvalue))

# signifMods = colnames(matrix_pvalue)[colSums(matrix_pvalue_adj<=pval_cutoff)>0]

matrix_pvalue_m = reshape2::melt(as.matrix(matrix_pvalue)) %>% 
  group_by(Var2) %>% summarise(pval = min(value)) %>% arrange(pval)

top_genes = as.character(matrix_pvalue_m$Var2[1:top_n_genes])

# save(signifMods, file = paste0(bn_results_dir,"selected_dataExp.RData"))
cat("Saved:", paste0(bn_results_dir,"selected_dataExp.RData"))
## Saved: /pastel/projects/speakeasy_dlpfc/BN/sn_dlpfc_ast_res/ast_modules/ast_m19/selected_dataExp.RData
# Save the input expression for the BN
# write_csv(as.data.frame(expr_matx_mod)[top_genes,], file = BN_run_data, col_names = F)
cat("Saved:", BN_run_data)
## Saved: /pastel/projects/speakeasy_dlpfc/BN/sn_dlpfc_ast_res/ast_modules/ast_m19/ast_m19_exp.txt
# CINDERellA has two inputs: exp.txt and output_folder 
setwd(BN_run_dir)
cmd_matlab_call = paste0("matlab -nodisplay -nojvm -nosplash -nodesktop -r")
cmd_matlab_param = paste0("runt=1000; data='",BN_run_data,"'; out_dir='",BN_output_dir,"'; run('",BN_run_dir,"CINDERellA.m')")
cmd_matlab_run = paste0(cmd_matlab_call, " \"",cmd_matlab_param,"\"")
cat(cmd_matlab_run)
system(cmd_matlab_run)
## [1] "/pastel/projects/speakeasy_dlpfc/BN/sn_dlpfc_ast_res/ast_modules/ast_m19/ast_m19_res"

Read results

Edges filtered

# BN_output_dir = paste0(bn_results_dir,data_prefix,"_res")
# BN_run_data = paste0(bn_results_dir,data_prefix,"_exp.txt")

# load(file = paste0(bn_results_dir,data_prefix,"_BN_input.RData")) # phenotype_match,gene_net_metadata,mod_eigengen
# load(file = paste0(bn_results_dir,data_prefix,"_dataExp.RData"))

edgefrq = read_tsv(paste0(BN_output_dir,"/edgefrq.txt"), col_names = c("A","B","freq"), show_col_types = F)

dataExp = expr_matx_mod
edges_df = na.omit(cbind(edgefrq, rownames(dataExp)[edgefrq$A], rownames(dataExp)[edgefrq$B]))
colnames(edges_df) = c("fromNode", "toNode", "weight", "fromAltName", "toAltName") # match names for Cytoscape input

edges_df$fromAltName = gsub("(.*)\\.(.*)","\\1",edges_df$fromAltName)
edges_df$toAltName = gsub("(.*)\\.(.*)","\\1",edges_df$toAltName)

edges_filtered = edges_df[abs(edges_df$weight)>0.3, ] # weight default = 0.33
rownames(edges_filtered) = NULL

# createDT(edges_filtered %>% arrange(-weight))

Nodes filtered

nodes_table = data.frame(nodeName = 1:nrow(dataExp), altName = gsub("(.*)\\.(.*)","\\1",rownames(dataExp))) %>% distinct()
nodes_table$altName = gsub("(.*)\\.(.*)","\\1",nodes_table$altName)
rownames(nodes_table) = NULL
nodes_table = na.omit(unique(nodes_table)) %>% left_join(unique(gene_modules[,c("ensembl","gene_name")]), by = c("altName"="ensembl"))

nodes_filtered = nodes_table[nodes_table$altName %in% unique(c(edges_filtered$fromAltName,edges_filtered$toAltName)), ]
nodes_filtered = nodes_filtered[! duplicated(nodes_filtered$altName), ] 
createDT(nodes_filtered %>% left_join(reshape2::melt(as.matrix(matrix_pvalue)), by = c("altName"="Var2")) %>%
  group_by(altName) %>% mutate(best_pval = min(value), 
                               best_pheno = Var1[which.min(value)],
                               pvalues = paste0(formatC(value, format = "e", digits = 2), collapse = ";"), 
                               phenotypes = paste0(Var1, collapse = ";")) %>% 
  select(nodeName, altName, gene_name, best_pval, best_pheno, pvalues, phenotypes) %>%
  distinct() %>% arrange(best_pval))

BN plot

# Cairo::CairoPDF(file = "/pastel/Github_scripts/SpeakEasy_dlpfc/figures4paper/v2_mar2024/bn_ast19.pdf", width = 16, height = 10)
plot_geneBN(edges_filtered = edges_filtered,
          nodes_filtered = nodes_filtered)
## Using `stress` as default layout

# dev.off()

Session

sessionInfo()
## R version 4.1.2 (2021-11-01)
## Platform: x86_64-pc-linux-gnu (64-bit)
## Running under: CentOS Stream 8
## 
## Matrix products: default
## BLAS/LAPACK: /usr/lib64/libopenblasp-r0.3.15.so
## 
## locale:
##  [1] LC_CTYPE=en_US.UTF-8       LC_NUMERIC=C              
##  [3] LC_TIME=en_US.UTF-8        LC_COLLATE=en_US.UTF-8    
##  [5] LC_MONETARY=en_US.UTF-8    LC_MESSAGES=en_US.UTF-8   
##  [7] LC_PAPER=en_US.UTF-8       LC_NAME=C                 
##  [9] LC_ADDRESS=C               LC_TELEPHONE=C            
## [11] LC_MEASUREMENT=en_US.UTF-8 LC_IDENTIFICATION=C       
## 
## attached base packages:
## [1] grid      stats     graphics  grDevices utils     datasets  methods  
## [8] base     
## 
## other attached packages:
##  [1] ggforce_0.3.3         igraph_1.5.1          graphlayouts_0.8.0   
##  [4] gtools_3.9.4          RColorBrewer_1.1-3    circlize_0.4.14      
##  [7] ComplexHeatmap_2.11.1 BiocParallel_1.28.3   R.matlab_3.6.2       
## [10] furrr_0.3.1           future_1.33.0         yardstick_1.2.0      
## [13] workflowsets_1.0.1    workflows_1.1.3       tune_1.1.2           
## [16] rsample_1.2.0         recipes_1.0.8         parsnip_1.1.1        
## [19] modeldata_1.2.0       infer_1.0.5           dials_1.2.0          
## [22] scales_1.2.1          broom_1.0.5           tidymodels_1.1.1     
## [25] data.table_1.14.8     ggsci_2.9             tidygraph_1.2.0      
## [28] ggnetwork_0.5.10      sna_2.6               statnet.common_4.5.0 
## [31] GGally_2.1.2          ggraph_2.0.5          network_1.17.1       
## [34] reshape2_1.4.4        kableExtra_1.3.4      forcats_0.5.1        
## [37] stringr_1.5.0         dplyr_1.1.3           purrr_1.0.2          
## [40] readr_2.1.4           tidyr_1.3.0           tibble_3.2.1         
## [43] ggplot2_3.4.4         tidyverse_1.3.1      
## 
## loaded via a namespace (and not attached):
##   [1] shadowtext_0.1.1    readxl_1.3.1        backports_1.4.1    
##   [4] systemfonts_1.0.4   plyr_1.8.9          splines_4.1.2      
##   [7] crosstalk_1.2.0     listenv_0.9.0       digest_0.6.33      
##  [10] foreach_1.5.2       htmltools_0.5.7     viridis_0.6.2      
##  [13] fansi_1.0.5         magrittr_2.0.3      cluster_2.1.2      
##  [16] doParallel_1.0.17   tzdb_0.4.0          globals_0.16.2     
##  [19] modelr_0.1.8        gower_1.0.1         matrixStats_1.0.0  
##  [22] vroom_1.6.4         R.utils_2.12.2      svglite_2.1.0      
##  [25] hardhat_1.3.0       colorspace_2.1-0    rvest_1.0.2        
##  [28] ggrepel_0.9.4       haven_2.4.3         xfun_0.41          
##  [31] crayon_1.5.2        jsonlite_1.8.7      survival_3.2-13    
##  [34] iterators_1.0.14    glue_1.6.2          polyclip_1.10-6    
##  [37] gtable_0.3.4        ipred_0.9-14        webshot_0.5.2      
##  [40] GetoptLong_1.0.5    shape_1.4.6         future.apply_1.11.0
##  [43] BiocGenerics_0.40.0 DBI_1.1.3           Rcpp_1.0.11        
##  [46] viridisLite_0.4.2   clue_0.3-60         bit_4.0.5          
##  [49] GPfit_1.0-8         DT_0.30             stats4_4.1.2       
##  [52] lava_1.7.2.1        prodlim_2023.08.28  htmlwidgets_1.6.2  
##  [55] httr_1.4.7          ellipsis_0.3.2      pkgconfig_2.0.3    
##  [58] reshape_0.8.9       R.methodsS3_1.8.2   farver_2.1.1       
##  [61] nnet_7.3-16         sass_0.4.7          dbplyr_2.4.0       
##  [64] utf8_1.2.4          labeling_0.4.3      tidyselect_1.2.0   
##  [67] rlang_1.1.2         DiceDesign_1.9      munsell_0.5.0      
##  [70] cellranger_1.1.0    tools_4.1.2         cachem_1.0.8       
##  [73] cli_3.6.1           generics_0.1.3      evaluate_0.23      
##  [76] fastmap_1.1.1       yaml_2.3.7          bit64_4.0.5        
##  [79] knitr_1.45          fs_1.6.3            R.oo_1.25.0        
##  [82] xml2_1.3.5          compiler_4.1.2      rstudioapi_0.15.0  
##  [85] png_0.1-8           reprex_2.0.1        lhs_1.1.6          
##  [88] tweenr_1.0.2        bslib_0.5.1         stringi_1.7.12     
##  [91] highr_0.10          lattice_0.20-45     Matrix_1.6-1.1     
##  [94] vctrs_0.6.4         pillar_1.9.0        lifecycle_1.0.3    
##  [97] GlobalOptions_0.1.2 jquerylib_0.1.4     R6_2.5.1           
## [100] gridExtra_2.3       IRanges_2.28.0      parallelly_1.36.0  
## [103] codetools_0.2-18    MASS_7.3-54         rjson_0.2.21       
## [106] withr_2.5.2         S4Vectors_0.32.4    parallel_4.1.2     
## [109] hms_1.1.3           rpart_4.1-15        timeDate_4022.108  
## [112] coda_0.19-4         class_7.3-19        rmarkdown_2.25     
## [115] lubridate_1.8.0